#include "opencl_source_map.hpp" 
namespace MNN { 
const char* roi_pooling = 
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define GLOBAL_SIZE_3_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,__private const int global_size_dim2,\n"
"#define DEAL_NON_UNIFORM_DIM3(input1, input2, input3) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1 || input3 >= global_size_dim2) { "" return; "" }\n"
"__constant sampler_t SAMPLER=CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
"#define MIN_VALUE -FLT_MAX\n"
"// Supported data type: half/float\n"
"__kernel void roi_pooling(GLOBAL_SIZE_3_DIMS __read_only image2d_t input,__read_only image2d_t roi,\n"
" __private const int in_height,__private const int in_width,__private const int in_batch,\n"
" __private const int out_height,__private const int out_width,__private const float spatial_scale,\n"
" __write_only image2d_t output) {\n"
" const int out_channel_idx=get_global_id(0);\n"
" const int out_width_idx=get_global_id(1);\n"
" const int out_hb_idx=get_global_id(2);\n"
" DEAL_NON_UNIFORM_DIM3(out_channel_idx,out_width_idx,out_hb_idx);\n"
" const int roi_batch_idx=out_hb_idx/out_height;\n"
" const int out_height_idx=out_hb_idx % out_height;\n"
"#if defined ROI_C1H1W5\n"
" FLOAT4 roi_0=RI_F(roi,SAMPLER,(int2)(0,roi_batch_idx));\n"
" int input_batch=roi_0.x;\n"
" if(input_batch >= in_batch){\n"
" return;\n"
" }\n"
" FLOAT4 roi_1=RI_F(roi,SAMPLER,(int2)(1,roi_batch_idx));\n"
" FLOAT4 roi_2=RI_F(roi,SAMPLER,(int2)(2,roi_batch_idx));\n"
" FLOAT4 roi_3=RI_F(roi,SAMPLER,(int2)(3,roi_batch_idx));\n"
" FLOAT4 roi_4=RI_F(roi,SAMPLER,(int2)(4,roi_batch_idx));\n"
" int x1=round(roi_1.x*spatial_scale);\n"
" int y1=round(roi_2.x*spatial_scale);\n"
" int x2=round(roi_3.x*spatial_scale);\n"
" int y2=round(roi_4.x*spatial_scale);\n"
"#elif defined ROI_C5H1W1\n"
" FLOAT4 roi_0=RI_F(roi,SAMPLER,(int2)(0,roi_batch_idx));\n"
" int input_batch=roi_0.x;\n"
" if(input_batch >= in_batch){\n"
" return;\n"
" }\n"
" FLOAT4 roi_1=RI_F(roi,SAMPLER,(int2)(1,roi_batch_idx));\n"
" int x1=round(roi_0.y*spatial_scale);\n"
" int y1=round(roi_0.z*spatial_scale);\n"
" int x2=round(roi_0.w*spatial_scale);\n"
" int y2=round(roi_1.x*spatial_scale);\n"
"#else\n"
" const int roi_batch_offset=roi_batch_idx*5;\n"
" FLOAT4 roi_0=RI_F(roi,SAMPLER,(int2)(0,roi_batch_offset));\n"
" int input_batch=roi_0.x;\n"
" if(input_batch >= in_batch){\n"
" return;\n"
" }\n"
" FLOAT4 roi_1=RI_F(roi,SAMPLER,(int2)(0,roi_batch_offset+1));\n"
" FLOAT4 roi_2=RI_F(roi,SAMPLER,(int2)(0,roi_batch_offset+2));\n"
" FLOAT4 roi_3=RI_F(roi,SAMPLER,(int2)(0,roi_batch_offset+3));\n"
" FLOAT4 roi_4=RI_F(roi,SAMPLER,(int2)(0,roi_batch_offset+4));\n"
" int x1=round(roi_1.x*spatial_scale);\n"
" int y1=round(roi_2.x*spatial_scale);\n"
" int x2=round(roi_3.x*spatial_scale);\n"
" int y2=round(roi_4.x*spatial_scale);\n"
"#endif\n"
" int roiW=max(x2-x1+1,1);\n"
" int roiH=max(y2-y1+1,1);\n"
" float binSizeW=(float)roiW/(float)out_width;\n"
" float binSizeH=(float)roiH/(float)out_height;\n"
" int hStart=min(max(y1+(int)floor(out_height_idx*binSizeH),0),in_height);\n"
" int hEnd=min(max(y1+(int)ceil((out_height_idx+1)*binSizeH),0),in_height);\n"
" int hLen=hEnd-hStart;\n"
" int wStart=min(max(x1+(int)floor(out_width_idx*binSizeW),0),in_width);\n"
" int wEnd=min(max(x1+(int)ceil((out_width_idx+1)*binSizeW),0),in_width);\n"
" int wLen=wEnd-wStart;\n"
" const int pos=mad24(out_channel_idx,out_width,out_width_idx);\n"
" const FLOAT4 zero_vec=(FLOAT4)(0);\n"
" if (wLen <= 0 || hLen <= 0) {\n"
" WI_F(output,(int2)(pos,out_hb_idx),zero_vec);\n"
" return;\n"
" }\n"
" FLOAT4 res=(FLOAT4)(MIN_VALUE);\n"
" const int in_height_start=hStart;\n"
" const int in_width_start=wStart;\n"
" const int in_channel_offset=mul24(out_channel_idx,in_width);\n"
" const int in_height_offset=mul24(input_batch,in_height);\n"
" const int batch_idx=mul24(input_batch,in_height);\n"
" for (int height=0; height<hLen; ++height) {\n"
" int in_height_idx=in_height_start+height;\n"
" for (int width=0; width<wLen; ++width) {\n"
" int in_width_idx=in_width_start+width;\n"
" FLOAT4 in=RI_F(input,SAMPLER,(int2)(in_channel_offset+in_width_idx,in_height_offset+in_height_idx));\n"
" res=fmax(res,in);\n"
" }\n"
" }\n"
" WI_F(output,(int2)(pos,out_hb_idx),res);\n"
"}\n"
;
}
